코드 최적화 개요#

강좌: 수치해석 프로젝트

코드 Profiling#

실행 시간, 함수 호출 정보를 측정해서, 계산 시간이 많이 걸리는 Hot-spot 분석

주요 측정 방법#

  • 총 시간 측정

    • BASH shell : time

    • Jupyter : %time, %timeit

  • 함수 호출 측정

  • 라인별 소요 시간 측정

예제#

Jacobi method의 Hot-spot을 찾아라

from scipy import linalg
import numpy as np
def jacobi(n, ti, dt):
    """
    Jacobi method
    
    Parameters
    ----------
    n : integer
        size
    ti : float
        current time
    dt : array
        difference
    """
    for i in range(1, n+1):
        for j in range(1, n+1):
            dt[i, j] = 0.25*(ti[i-1, j] + ti[i, j-1] + ti[i+1, j] + ti[i, j+1]) - ti[i, j]
            
    # Update
    ti += dt
            
def jacobi_v1(n, ti, dt):
    """
    Jacobi method (Vector version)
    
    Parameters
    ----------
    ti : float
        current time
        
    Parameters
    -----------
    dt : array
        difference
    """
    dt[1:-1, 1:-1] = 0.25*(ti[:-2, 1:-1] + ti[1:-1, :-2] + ti[2:, 1:-1] + ti[1:-1, 2:]) - ti[1:-1, 1:-1]
    
    # Update
    ti += dt
def solve_laplace(n, solver, tol=1e-5, order='C'):
    """
    Laplace Equation solver
    
    Parameters
    ----------
    n : integer
        size
    solver : function
        iterative solver
    tol : float
        tolerance
    order : string
        'C' | 'F'
        
    Returns
    -------
    err : float
        residual
    """
    ti = np.zeros((n+2, n+2), order=order)
    dt = np.zeros((n+2, n+2), order=order)

    def bc(t):
        t[-1, 1:-1] = 300
        t[0, 1:-1] = 100
        t[1:-1, -1] = 100
        t[1:-1, 0] = 100

    err = 1
    hist = []
    while err > tol:
        # Apply BC
        bc(ti)

        # Run Gauss-Seidel
        solver(n, ti, dt)

        # Compute Error
        err = linalg.norm(dt) / n
        
    return err

코드 실행 시간 측정#

  • Python loop는 상당히 느림

# Python loop
%time solve_laplace(49, jacobi)
CPU times: user 7.31 s, sys: 1.09 ms, total: 7.31 s
Wall time: 7.31 s
9.997943770878389e-06
# Vector version
%time solve_laplace(49, jacobi_v1)
CPU times: user 103 ms, sys: 0 ns, total: 103 ms
Wall time: 102 ms
9.997943770878389e-06

Profile 결과#

  • jacobi 함수가 대부분의 계산 시간을 차지함

  • Vector version에서는 이 시간이 100배 이상 단축됨

%prun -T prof_jacobi.txt solve_laplace(49, jacobi)
print(open('prof_jacobi.txt').read())
 
*** Profile printout saved to text file 'prof_jacobi.txt'. 
         102346 function calls (97229 primitive calls) in 8.043 seconds

   Ordered by: internal time

   ncalls  tottime  percall  cumtime  percall filename:lineno(function)
     5117    7.978    0.002    7.978    0.002 3378516824.py:1(jacobi)
     5117    0.011    0.000    0.022    0.000 linalg.py:2363(norm)
     5117    0.010    0.000    0.020    0.000 function_base.py:422(asarray_chkfinite)
10234/5117    0.008    0.000    0.023    0.000 {built-in method numpy.core._multiarray_umath.implement_array_function}
     5117    0.007    0.000    0.007    0.000 3211324933.py:24(bc)
     5117    0.007    0.000    0.007    0.000 {method 'reduce' of 'numpy.ufunc' objects}
     5117    0.006    0.000    0.053    0.000 _misc.py:17(norm)
        1    0.004    0.004    8.043    8.043 3211324933.py:1(solve_laplace)
     5117    0.002    0.000    0.026    0.000 <__array_function__ internals>:2(norm)
     5117    0.002    0.000    0.008    0.000 <__array_function__ internals>:2(dot)
     5117    0.001    0.000    0.010    0.000 {method 'all' of 'numpy.ndarray' objects}
     5117    0.001    0.000    0.008    0.000 _methods.py:60(_all)
     5117    0.001    0.000    0.001    0.000 {method 'ravel' of 'numpy.ndarray' objects}
    10234    0.001    0.000    0.001    0.000 {built-in method numpy.asarray}
     5117    0.001    0.000    0.001    0.000 linalg.py:112(isComplexType)
    10234    0.001    0.000    0.001    0.000 {built-in method builtins.issubclass}
     5117    0.001    0.000    0.001    0.000 multiarray.py:736(dot)
     5117    0.000    0.000    0.000    0.000 linalg.py:2359(_norm_dispatcher)
        1    0.000    0.000    8.043    8.043 {built-in method builtins.exec}
        2    0.000    0.000    0.000    0.000 {built-in method numpy.zeros}
        1    0.000    0.000    8.043    8.043 <string>:1(<module>)
        1    0.000    0.000    0.000    0.000 {method 'disable' of '_lsprof.Profiler' objects}
%prun -T prof_jacobi_v1.txt solve_laplace(49, jacobi_v1)
print(open('prof_jacobi_v1.txt').read())
 
*** Profile printout saved to text file 'prof_jacobi_v1.txt'. 
         102346 function calls (97229 primitive calls) in 0.127 seconds

   Ordered by: internal time

   ncalls  tottime  percall  cumtime  percall filename:lineno(function)
     5117    0.063    0.000    0.063    0.000 3378516824.py:21(jacobi_v1)
     5117    0.011    0.000    0.021    0.000 linalg.py:2363(norm)
     5117    0.009    0.000    0.019    0.000 function_base.py:422(asarray_chkfinite)
     5117    0.007    0.000    0.007    0.000 3211324933.py:24(bc)
10234/5117    0.007    0.000    0.023    0.000 {built-in method numpy.core._multiarray_umath.implement_array_function}
     5117    0.007    0.000    0.007    0.000 {method 'reduce' of 'numpy.ufunc' objects}
     5117    0.006    0.000    0.052    0.000 _misc.py:17(norm)
        1    0.005    0.005    0.127    0.127 3211324933.py:1(solve_laplace)
     5117    0.003    0.000    0.026    0.000 <__array_function__ internals>:2(norm)
     5117    0.002    0.000    0.008    0.000 <__array_function__ internals>:2(dot)
     5117    0.001    0.000    0.009    0.000 {method 'all' of 'numpy.ndarray' objects}
     5117    0.001    0.000    0.008    0.000 _methods.py:60(_all)
    10234    0.001    0.000    0.001    0.000 {built-in method numpy.asarray}
     5117    0.001    0.000    0.001    0.000 {method 'ravel' of 'numpy.ndarray' objects}
     5117    0.001    0.000    0.001    0.000 linalg.py:112(isComplexType)
    10234    0.001    0.000    0.001    0.000 {built-in method builtins.issubclass}
     5117    0.001    0.000    0.001    0.000 multiarray.py:736(dot)
     5117    0.001    0.000    0.001    0.000 linalg.py:2359(_norm_dispatcher)
        2    0.000    0.000    0.000    0.000 {built-in method numpy.zeros}
        1    0.000    0.000    0.127    0.127 {built-in method builtins.exec}
        1    0.000    0.000    0.127    0.127 <string>:1(<module>)
        1    0.000    0.000    0.000    0.000 {method 'disable' of '_lsprof.Profiler' objects}

Line profile#

  • lprof를 이용해 라인별 계산 시간 측정

Note

  • line_profiler 패키지가 설치되어야 함

  • solver 함수가 Hot-spot임

# Line profiler magic key 호출
%load_ext line_profiler
%lprun -T lprof_jacobi.txt -f solve_laplace solve_laplace(49, jacobi)
print(open('lprof_jacobi.txt').read())
*** Profile printout saved to text file 'lprof_jacobi.txt'. 
Timer unit: 1e-09 s

Total time: 10.427 s
File: /tmp/ipykernel_63391/3211324933.py
Function: solve_laplace at line 1

Line #      Hits         Time  Per Hit   % Time  Line Contents
==============================================================
     1                                           def solve_laplace(n, solver, tol=1e-5, order='C'):
     2                                               """
     3                                               Laplace Equation solver
     4                                               
     5                                               Parameters
     6                                               ----------
     7                                               n : integer
     8                                                   size
     9                                               solver : function
    10                                                   iterative solver
    11                                               tol : float
    12                                                   tolerance
    13                                               order : string
    14                                                   'C' | 'F'
    15                                                   
    16                                               Returns
    17                                               -------
    18                                               err : float
    19                                                   residual
    20                                               """
    21         1      10149.0  10149.0      0.0      ti = np.zeros((n+2, n+2), order=order)
    22         1      19958.0  19958.0      0.0      dt = np.zeros((n+2, n+2), order=order)
    23                                           
    24         1        290.0    290.0      0.0      def bc(t):
    25                                                   t[-1, 1:-1] = 300
    26                                                   t[0, 1:-1] = 100
    27                                                   t[1:-1, -1] = 100
    28                                                   t[1:-1, 0] = 100
    29                                           
    30         1        110.0    110.0      0.0      err = 1
    31         1        100.0    100.0      0.0      hist = []
    32      5117     913867.0    178.6      0.0      while err > tol:
    33                                                   # Apply BC
    34      5117    9189306.0   1795.8      0.1          bc(ti)
    35                                           
    36                                                   # Run Gauss-Seidel
    37      5117 10365925356.0 2025781.8     99.4          solver(n, ti, dt)
    38                                           
    39                                                   # Compute Error
    40      5117   50949347.0   9956.9      0.5          err = linalg.norm(dt) / n
    41                                                   
    42         1         80.0     80.0      0.0      return err
%lprun -T lprof_jacobi_inner.txt -f jacobi solve_laplace(49, jacobi)
print(open('lprof_jacobi_inner.txt').read())
*** Profile printout saved to text file 'lprof_jacobi_inner.txt'. 
Timer unit: 1e-09 s

Total time: 12.3775 s
File: /tmp/ipykernel_63391/3378516824.py
Function: jacobi at line 1

Line #      Hits         Time  Per Hit   % Time  Line Contents
==============================================================
     1                                           def jacobi(n, ti, dt):
     2                                               """
     3                                               Jacobi method
     4                                               
     5                                               Parameters
     6                                               ----------
     7                                               n : integer
     8                                                   size
     9                                               ti : float
    10                                                   current time
    11                                               dt : array
    12                                                   difference
    13                                               """
    14    250733   22336006.0     89.1      0.2      for i in range(1, n+1):
    15  12285917 1153899534.0     93.9      9.3          for j in range(1, n+1):
    16  12285917 11193868106.0    911.1     90.4              dt[i, j] = 0.25*(ti[i-1, j] + ti[i, j-1] + ti[i+1, j] + ti[i, j+1]) - ti[i, j]
    17                                                       
    18                                               # Update
    19      5117    7381007.0   1442.4      0.1      ti += dt
%lprun -T lprof_jacobi_v1.txt -f solve_laplace solve_laplace(49, jacobi_v1)
print(open('lprof_jacobi_v1.txt').read())
*** Profile printout saved to text file 'lprof_jacobi_v1.txt'. 
Timer unit: 1e-09 s

Total time: 0.124165 s
File: /tmp/ipykernel_63391/3211324933.py
Function: solve_laplace at line 1

Line #      Hits         Time  Per Hit   % Time  Line Contents
==============================================================
     1                                           def solve_laplace(n, solver, tol=1e-5, order='C'):
     2                                               """
     3                                               Laplace Equation solver
     4                                               
     5                                               Parameters
     6                                               ----------
     7                                               n : integer
     8                                                   size
     9                                               solver : function
    10                                                   iterative solver
    11                                               tol : float
    12                                                   tolerance
    13                                               order : string
    14                                                   'C' | 'F'
    15                                                   
    16                                               Returns
    17                                               -------
    18                                               err : float
    19                                                   residual
    20                                               """
    21         1       9899.0   9899.0      0.0      ti = np.zeros((n+2, n+2), order=order)
    22         1      19947.0  19947.0      0.0      dt = np.zeros((n+2, n+2), order=order)
    23                                           
    24         1        290.0    290.0      0.0      def bc(t):
    25                                                   t[-1, 1:-1] = 300
    26                                                   t[0, 1:-1] = 100
    27                                                   t[1:-1, -1] = 100
    28                                                   t[1:-1, 0] = 100
    29                                           
    30         1        110.0    110.0      0.0      err = 1
    31         1        110.0    110.0      0.0      hist = []
    32      5117     906762.0    177.2      0.7      while err > tol:
    33                                                   # Apply BC
    34      5117    9419200.0   1840.8      7.6          bc(ti)
    35                                           
    36                                                   # Run Gauss-Seidel
    37      5117   64892267.0  12681.7     52.3          solver(n, ti, dt)
    38                                           
    39                                                   # Compute Error
    40      5117   48916473.0   9559.6     39.4          err = linalg.norm(dt) / n
    41                                                   
    42         1         90.0     90.0      0.0      return err
%lprun -T lprof_jacobi_inner_v1.txt -f jacobi_v1 solve_laplace(49, jacobi_v1)
print(open('lprof_jacobi_inner_v1.txt').read())
*** Profile printout saved to text file 'lprof_jacobi_inner_v1.txt'. 
Timer unit: 1e-09 s

Total time: 0.0713991 s
File: /tmp/ipykernel_63391/3378516824.py
Function: jacobi_v1 at line 21

Line #      Hits         Time  Per Hit   % Time  Line Contents
==============================================================
    21                                           def jacobi_v1(n, ti, dt):
    22                                               """
    23                                               Jacobi method (Vector version)
    24                                               
    25                                               Parameters
    26                                               ----------
    27                                               ti : float
    28                                                   current time
    29                                                   
    30                                               Parameters
    31                                               -----------
    32                                               dt : array
    33                                                   difference
    34                                               """
    35      5117   64485401.0  12602.2     90.3      dt[1:-1, 1:-1] = 0.25*(ti[:-2, 1:-1] + ti[1:-1, :-2] + ti[2:, 1:-1] + ti[1:-1, 2:]) - ti[1:-1, 1:-1]
    36                                               
    37                                               # Update
    38      5117    6913738.0   1351.1      9.7      ti += dt

코드 최적화#

  • Hot-spot을 빠르게 구현할 방법을 고안

    • Python For loop가 매우 느려서 나타나는 상황임

  • 가속화 #1

    • Fortran, C/C++ 함수를 구현한 후 파이썬에 연동

src="""
! Jacobi Method
subroutine jacobi(n, ti, dt)
implicit none
integer, intent(in) :: n
real(8), dimension(:, :), intent(inout) :: ti
real(8), dimension(:, :), intent(inout) :: dt

integer :: i, j
do j = 2, n+1
  do i = 2, n+1
    dt(i,j) = 0.25*(ti(i-1,j) + ti(i,j-1) + ti(i+1,j) + ti(i, j+1)) - ti(i,j)
  enddo
enddo

! Update
do j = 1, n+2
  do i = 1, n+2
    ti(i,j) = ti(i,j) + dt(i,j)
  enddo
enddo

end subroutine jacobi
"""

# Write f90 code
with open('fjacobi.f90', 'w') as f:
    f.write(src)
# Make fortran module
!f2py -c -m fsolver fjacobi.f90
running build
running config_cc
unifing config_cc, config, build_clib, build_ext, build commands --compiler options
running config_fc
unifing config_fc, config, build_clib, build_ext, build commands --fcompiler options
running build_src
build_src
building extension "fsolver" sources
f2py options: []
f2py:> /tmp/tmpui9nfs8q/src.linux-x86_64-3.10/fsolvermodule.c
creating /tmp/tmpui9nfs8q/src.linux-x86_64-3.10
Reading fortran codes...
	Reading file 'fjacobi.f90' (format:free)
Post-processing...
	Block: fsolver
			Block: jacobi
Post-processing (stage 2)...
Building modules...
	Building module "fsolver"...
		Creating wrapper for Fortran subroutine "jacobi"("jacobi")...
		Constructing wrapper function "jacobi"...
		  jacobi(n,ti,dt)
	Wrote C/API module "fsolver" to file "/tmp/tmpui9nfs8q/src.linux-x86_64-3.10/fsolvermodule.c"
	Fortran 90 wrappers are saved to "/tmp/tmpui9nfs8q/src.linux-x86_64-3.10/fsolver-f2pywrappers2.f90"
  adding '/tmp/tmpui9nfs8q/src.linux-x86_64-3.10/fortranobject.c' to sources.
  adding '/tmp/tmpui9nfs8q/src.linux-x86_64-3.10' to include_dirs.
copying /usr/lib/python3/dist-packages/numpy/f2py/src/fortranobject.c -> /tmp/tmpui9nfs8q/src.linux-x86_64-3.10
copying /usr/lib/python3/dist-packages/numpy/f2py/src/fortranobject.h -> /tmp/tmpui9nfs8q/src.linux-x86_64-3.10
  adding '/tmp/tmpui9nfs8q/src.linux-x86_64-3.10/fsolver-f2pywrappers2.f90' to sources.
build_src: building npy-pkg config files
running build_ext
customize UnixCCompiler
customize UnixCCompiler using build_ext
get_default_fcompiler: matching types: '['gnu95', 'intel', 'lahey', 'pg', 'nv', 'absoft', 'nag', 'vast', 'compaq', 'intele', 'intelem', 'gnu', 'g95', 'pathf95', 'nagfor', 'fujitsu']'
customize Gnu95FCompiler
Found executable /usr/bin/gfortran
customize Gnu95FCompiler
customize Gnu95FCompiler using build_ext
building 'fsolver' extension
compiling C sources
C compiler: x86_64-linux-gnu-gcc -Wno-unused-result -Wsign-compare -DNDEBUG -g -fwrapv -O2 -Wall -g -fstack-protector-strong -Wformat -Werror=format-security -g -fwrapv -O2 -g -fstack-protector-strong -Wformat -Werror=format-security -Wdate-time -D_FORTIFY_SOURCE=2 -fPIC

creating /tmp/tmpui9nfs8q/tmp
creating /tmp/tmpui9nfs8q/tmp/tmpui9nfs8q
creating /tmp/tmpui9nfs8q/tmp/tmpui9nfs8q/src.linux-x86_64-3.10
compile options: '-DNPY_DISABLE_OPTIMIZATION=1 -I/tmp/tmpui9nfs8q/src.linux-x86_64-3.10 -I/usr/lib/python3/dist-packages/numpy/core/include -I/usr/include/python3.10 -c'
x86_64-linux-gnu-gcc: /tmp/tmpui9nfs8q/src.linux-x86_64-3.10/fsolvermodule.c
x86_64-linux-gnu-gcc: /tmp/tmpui9nfs8q/src.linux-x86_64-3.10/fortranobject.c
In file included from /usr/lib/python3/dist-packages/numpy/core/include/numpy/ndarraytypes.h:1969,
                 from /usr/lib/python3/dist-packages/numpy/core/include/numpy/ndarrayobject.h:12,
                 from /usr/lib/python3/dist-packages/numpy/core/include/numpy/arrayobject.h:4,
                 from /tmp/tmpui9nfs8q/src.linux-x86_64-3.10/fortranobject.h:13,
                 from /tmp/tmpui9nfs8q/src.linux-x86_64-3.10/fortranobject.c:2:
/usr/lib/python3/dist-packages/numpy/core/include/numpy/npy_1_7_deprecated_api.h:17:2: warning: #warning "Using deprecated NumPy API, disable it with " "#define NPY_NO_DEPRECATED_API NPY_1_7_API_VERSION" []8;;https://gcc.gnu.org/onlinedocs/gcc/Warning-Options.html#index-Wcpp-Wcpp]8;;]
   17 | #warning "Using deprecated NumPy API, disable it with " \
      |  ^~~~~~~
In file included from /usr/lib/python3/dist-packages/numpy/core/include/numpy/ndarraytypes.h:1969,
                 from /usr/lib/python3/dist-packages/numpy/core/include/numpy/ndarrayobject.h:12,
                 from /usr/lib/python3/dist-packages/numpy/core/include/numpy/arrayobject.h:4,
                 from /tmp/tmpui9nfs8q/src.linux-x86_64-3.10/fortranobject.h:13,
                 from /tmp/tmpui9nfs8q/src.linux-x86_64-3.10/fsolvermodule.c:16:
/usr/lib/python3/dist-packages/numpy/core/include/numpy/npy_1_7_deprecated_api.h:17:2: warning: #warning "Using deprecated NumPy API, disable it with " "#define NPY_NO_DEPRECATED_API NPY_1_7_API_VERSION" []8;;https://gcc.gnu.org/onlinedocs/gcc/Warning-Options.html#index-Wcpp-Wcpp]8;;]
   17 | #warning "Using deprecated NumPy API, disable it with " \
      |  ^~~~~~~
/tmp/tmpui9nfs8q/src.linux-x86_64-3.10/fsolvermodule.c:137:12: warning: ‘f2py_size’ defined but not used []8;;https://gcc.gnu.org/onlinedocs/gcc/Warning-Options.html#index-Wunused-function-Wunused-function]8;;]
  137 | static int f2py_size(PyArrayObject* var, ...)
      |            ^~~~~~~~~
compiling Fortran sources
Fortran f77 compiler: /usr/bin/gfortran -Wall -g -ffixed-form -fno-second-underscore -fPIC -O3 -funroll-loops
Fortran f90 compiler: /usr/bin/gfortran -Wall -g -fno-second-underscore -fPIC -O3 -funroll-loops
Fortran fix compiler: /usr/bin/gfortran -Wall -g -ffixed-form -fno-second-underscore -Wall -g -fno-second-underscore -fPIC -O3 -funroll-loops
compile options: '-I/tmp/tmpui9nfs8q/src.linux-x86_64-3.10 -I/usr/lib/python3/dist-packages/numpy/core/include -I/usr/include/python3.10 -c'
gfortran:f90: fjacobi.f90
gfortran:f90: /tmp/tmpui9nfs8q/src.linux-x86_64-3.10/fsolver-f2pywrappers2.f90
/usr/bin/gfortran -Wall -g -Wall -g -shared /tmp/tmpui9nfs8q/tmp/tmpui9nfs8q/src.linux-x86_64-3.10/fsolvermodule.o /tmp/tmpui9nfs8q/tmp/tmpui9nfs8q/src.linux-x86_64-3.10/fortranobject.o /tmp/tmpui9nfs8q/fjacobi.o /tmp/tmpui9nfs8q/tmp/tmpui9nfs8q/src.linux-x86_64-3.10/fsolver-f2pywrappers2.o -L/usr/lib/gcc/x86_64-linux-gnu/11 -L/usr/lib/gcc/x86_64-linux-gnu/11 -lgfortran -o ./fsolver.cpython-310-x86_64-linux-gnu.so
Removing build directory /tmp/tmpui9nfs8q
# Import Fortran Jacobi solver
from fsolver import jacobi as fjacobi
%time solve_laplace(49, fjacobi, order='F')
CPU times: user 59.1 ms, sys: 16 ms, total: 75.1 ms
Wall time: 68.8 ms
9.997943770878389e-06
%prun -T prof_fjacobi.txt solve_laplace(49, fjacobi, order='F')
print(open('prof_fjacobi.txt').read())
 
*** Profile printout saved to text file 'prof_fjacobi.txt'. 
         97229 function calls (92112 primitive calls) in 0.098 seconds

   Ordered by: internal time

   ncalls  tottime  percall  cumtime  percall filename:lineno(function)
        1    0.017    0.017    0.098    0.098 3211324933.py:1(solve_laplace)
     5117    0.014    0.000    0.029    0.000 linalg.py:2363(norm)
     5117    0.013    0.000    0.027    0.000 function_base.py:422(asarray_chkfinite)
     5117    0.010    0.000    0.010    0.000 3211324933.py:24(bc)
     5117    0.010    0.000    0.010    0.000 {method 'reduce' of 'numpy.ufunc' objects}
10234/5117    0.010    0.000    0.031    0.000 {built-in method numpy.core._multiarray_umath.implement_array_function}
     5117    0.008    0.000    0.071    0.000 _misc.py:17(norm)
     5117    0.003    0.000    0.035    0.000 <__array_function__ internals>:2(norm)
     5117    0.002    0.000    0.011    0.000 <__array_function__ internals>:2(dot)
     5117    0.002    0.000    0.013    0.000 {method 'all' of 'numpy.ndarray' objects}
     5117    0.001    0.000    0.012    0.000 _methods.py:60(_all)
    10234    0.001    0.000    0.001    0.000 {built-in method numpy.asarray}
     5117    0.001    0.000    0.001    0.000 {method 'ravel' of 'numpy.ndarray' objects}
     5117    0.001    0.000    0.001    0.000 linalg.py:112(isComplexType)
    10234    0.001    0.000    0.001    0.000 {built-in method builtins.issubclass}
     5117    0.001    0.000    0.001    0.000 multiarray.py:736(dot)
     5117    0.001    0.000    0.001    0.000 linalg.py:2359(_norm_dispatcher)
        2    0.000    0.000    0.000    0.000 {built-in method numpy.zeros}
        1    0.000    0.000    0.098    0.098 {built-in method builtins.exec}
        1    0.000    0.000    0.098    0.098 <string>:1(<module>)
        1    0.000    0.000    0.000    0.000 {method 'disable' of '_lsprof.Profiler' objects}
%lprun -T lprof_fjacobi.txt -f solve_laplace solve_laplace(49, fjacobi, order='F')
print(open('lprof_fjacobi.txt').read())
*** Profile printout saved to text file 'lprof_fjacobi.txt'. 
Timer unit: 1e-09 s

Total time: 0.114883 s
File: /tmp/ipykernel_63391/3211324933.py
Function: solve_laplace at line 1

Line #      Hits         Time  Per Hit   % Time  Line Contents
==============================================================
     1                                           def solve_laplace(n, solver, tol=1e-5, order='C'):
     2                                               """
     3                                               Laplace Equation solver
     4                                               
     5                                               Parameters
     6                                               ----------
     7                                               n : integer
     8                                                   size
     9                                               solver : function
    10                                                   iterative solver
    11                                               tol : float
    12                                                   tolerance
    13                                               order : string
    14                                                   'C' | 'F'
    15                                                   
    16                                               Returns
    17                                               -------
    18                                               err : float
    19                                                   residual
    20                                               """
    21         1      10970.0  10970.0      0.0      ti = np.zeros((n+2, n+2), order=order)
    22         1      36369.0  36369.0      0.0      dt = np.zeros((n+2, n+2), order=order)
    23                                           
    24         1        341.0    341.0      0.0      def bc(t):
    25                                                   t[-1, 1:-1] = 300
    26                                                   t[0, 1:-1] = 100
    27                                                   t[1:-1, -1] = 100
    28                                                   t[1:-1, 0] = 100
    29                                           
    30         1        120.0    120.0      0.0      err = 1
    31         1        120.0    120.0      0.0      hist = []
    32      5117    1210287.0    236.5      1.1      while err > tol:
    33                                                   # Apply BC
    34      5117   12035320.0   2352.0     10.5          bc(ti)
    35                                           
    36                                                   # Run Gauss-Seidel
    37      5117   12221519.0   2388.4     10.6          solver(n, ti, dt)
    38                                           
    39                                                   # Compute Error
    40      5117   89368076.0  17464.9     77.8          err = linalg.norm(dt) / n
    41                                                   
    42         1        100.0    100.0      0.0      return err
  • 가속화 #2

    • Numba 등 파이썬 가속 패키지 활용

      • 파이썬 코드를 LLVM toolchain을 이용하여 가속화

      • Just-in Time (JIT) complication

import numba as nb
@nb.njit(fastmath=True)
def jacobi_nb(n, ti, dt):
    """
    Jacobi method
    
    Parameters
    ----------
    n : integer
        size
    ti : float
        current time
    dt : array
        difference
    """
    for i in range(1, n+1):
        for j in range(1, n+1):
            dt[i, j] = 0.25*(ti[i-1, j] + ti[i, j-1] + ti[i+1, j] + ti[i, j+1]) - ti[i, j]
            
    # Update
    ti += dt
%time solve_laplace(49, jacobi_nb)
CPU times: user 197 ms, sys: 4 ms, total: 202 ms
Wall time: 201 ms
9.997943770878389e-06
%prun -T prof_jacobi_nb.txt solve_laplace(49, jacobi_nb)
print(open('prof_jacobi_nb.txt').read())
 
*** Profile printout saved to text file 'prof_jacobi_nb.txt'. 
         102346 function calls (97229 primitive calls) in 0.106 seconds

   Ordered by: internal time

   ncalls  tottime  percall  cumtime  percall filename:lineno(function)
     5117    0.031    0.000    0.031    0.000 3143608721.py:1(jacobi_nb)
     5117    0.012    0.000    0.024    0.000 linalg.py:2363(norm)
     5117    0.011    0.000    0.022    0.000 function_base.py:422(asarray_chkfinite)
10234/5117    0.009    0.000    0.026    0.000 {built-in method numpy.core._multiarray_umath.implement_array_function}
        1    0.009    0.009    0.106    0.106 3211324933.py:1(solve_laplace)
     5117    0.008    0.000    0.008    0.000 3211324933.py:24(bc)
     5117    0.008    0.000    0.008    0.000 {method 'reduce' of 'numpy.ufunc' objects}
     5117    0.007    0.000    0.058    0.000 _misc.py:17(norm)
     5117    0.003    0.000    0.030    0.000 <__array_function__ internals>:2(norm)
     5117    0.002    0.000    0.009    0.000 <__array_function__ internals>:2(dot)
     5117    0.001    0.000    0.011    0.000 {method 'all' of 'numpy.ndarray' objects}
     5117    0.001    0.000    0.009    0.000 _methods.py:60(_all)
    10234    0.001    0.000    0.001    0.000 {built-in method numpy.asarray}
     5117    0.001    0.000    0.001    0.000 {method 'ravel' of 'numpy.ndarray' objects}
     5117    0.001    0.000    0.001    0.000 linalg.py:112(isComplexType)
    10234    0.001    0.000    0.001    0.000 {built-in method builtins.issubclass}
     5117    0.001    0.000    0.001    0.000 linalg.py:2359(_norm_dispatcher)
     5117    0.001    0.000    0.001    0.000 multiarray.py:736(dot)
        2    0.000    0.000    0.000    0.000 {built-in method numpy.zeros}
        1    0.000    0.000    0.106    0.106 {built-in method builtins.exec}
        1    0.000    0.000    0.106    0.106 <string>:1(<module>)
        1    0.000    0.000    0.000    0.000 {method 'disable' of '_lsprof.Profiler' objects}
%lprun -T lprof_jacobi_nb.txt -f solve_laplace solve_laplace(49, jacobi_nb)
print(open('lprof_jacobi_nb.txt').read())
*** Profile printout saved to text file 'lprof_jacobi_nb.txt'. 
Timer unit: 1e-09 s

Total time: 0.126012 s
File: /tmp/ipykernel_63391/3211324933.py
Function: solve_laplace at line 1

Line #      Hits         Time  Per Hit   % Time  Line Contents
==============================================================
     1                                           def solve_laplace(n, solver, tol=1e-5, order='C'):
     2                                               """
     3                                               Laplace Equation solver
     4                                               
     5                                               Parameters
     6                                               ----------
     7                                               n : integer
     8                                                   size
     9                                               solver : function
    10                                                   iterative solver
    11                                               tol : float
    12                                                   tolerance
    13                                               order : string
    14                                                   'C' | 'F'
    15                                                   
    16                                               Returns
    17                                               -------
    18                                               err : float
    19                                                   residual
    20                                               """
    21         1      17343.0  17343.0      0.0      ti = np.zeros((n+2, n+2), order=order)
    22         1      21340.0  21340.0      0.0      dt = np.zeros((n+2, n+2), order=order)
    23                                           
    24         1        661.0    661.0      0.0      def bc(t):
    25                                                   t[-1, 1:-1] = 300
    26                                                   t[0, 1:-1] = 100
    27                                                   t[1:-1, -1] = 100
    28                                                   t[1:-1, 0] = 100
    29                                           
    30         1        281.0    281.0      0.0      err = 1
    31         1        260.0    260.0      0.0      hist = []
    32      5117    1200012.0    234.5      1.0      while err > tol:
    33                                                   # Apply BC
    34      5117   11668397.0   2280.3      9.3          bc(ti)
    35                                           
    36                                                   # Run Gauss-Seidel
    37      5117   33923833.0   6629.6     26.9          solver(n, ti, dt)
    38                                           
    39                                                   # Compute Error
    40      5117   79179650.0  15473.8     62.8          err = linalg.norm(dt) / n
    41                                                   
    42         1        100.0    100.0      0.0      return err

좀 더 큰 문제 시간 비교#

  • Pure Python 보다 현격하게 빠름

  • Fortran 결과가 더 빠르나, 이는 포트란 컴파일러와 LLVM 컴파일러 버전에 따라 다를 수 있음

n = 199
%time solve_laplace(n, jacobi_v1)
%time solve_laplace(n, fjacobi, order='F')
%time solve_laplace(n, jacobi_nb)
CPU times: user 2min 9s, sys: 1.94 s, total: 2min 11s
Wall time: 8.22 s
CPU times: user 1min 4s, sys: 840 ms, total: 1min 4s
Wall time: 4.06 s
CPU times: user 1min 46s, sys: 1.42 s, total: 1min 48s
Wall time: 6.77 s
9.999287364322627e-06